* This dofile compiles the final dataset used in estimation.do

global tmp "/tmp"
cd /users/andreas/dropbox/work/patstat/data

*cd "C:\Users\federc\Dropbox\PATSTAT\DATA"
*global tmp "C:\Users\federc\Dropbox\TEMP"
*global dta "D:\Patstat data"

* ------------------------------------------
* Patent data: Main sample - 1992-
* ------------------------------------------

global cutoff 1991 // 1991/2003/2008

use patentsfam3, clear
drop if headq=="" | headq==".."
contract hrm headq
drop _freq
save /tmp/headq, replace

use patentsfam3, clear
drop p
ren granted p
gen tmp = p>0 & y>$cutoff 			 
egen survive = max(tmp), by(hrm)
keep if survive==1
drop tmp survive

tsset hrm y
tsfill, full  // Fill in the zeros
*foreach vv of varlist p triadic granted {
foreach vv of varlist p triadic {
  replace `vv'=0 if `vv'==.
}

* Cumulative sums
bys hrm: gen P = sum(p)
bys hrm: gen Ptri = sum(triadic)
*bys hrm: gen Pgra = sum(granted)
bys hrm: gen C = sum(p*citations3year)
gen Cbar = C/P
bys hrm: gen M = sum(p*mkts)
gen Mbar = M/P
drop M
bys hrm: gen I = sum(p*num_inventors)
gen Ibar = I/P
bys hrm: gen IPC = sum(p*num_ipc)
gen IPCbar = IPC/P

label var P "Cumulative patenting since 1965"
label var Ptri "Cumulative triadic patenting since 1965"
*label var Pgra "Cumulative granted patenting since 1965"
label var Cbar "Cumulative avg 3-year citations since 1965, relative to cumulative # patents"
label var Mbar "Cumulative # markets protected, relative to cumulative # patents"
label var Ibar "Cumulative # inventors, relative to cumulative # patents"
label var IPCbar "Cumulative # IPC codes, relative to cumulative # patents"

gen tmp = P if y==1985
egen P85 = max(tmp), by(hrm)
drop tmp
*gen tmp = Pgra if y==1985
*egen Pgra85 = max(tmp), by(hrm)
*drop tmp
gen tmp = Cbar if y==1985
egen Cbar85 = max(tmp), by(hrm)
drop tmp
drop if y<1980 | y>2009

drop headq // Fill in headq
merge m:1 hrm using /tmp/headq, keep(match)
drop _merge
*save DD$cutoff, replace
save DDGranted$cutoff, replace

* Save P85 for use in weights_wic5.do
use DD$cutoff, clear
contract hrm P85
drop _freq
save P85, replace

use DDGranted$cutoff, clear
contract hrm P85
drop _freq
save P85Granted, replace

* ------------------------------------------
* Patent data: Placebo - 1980-1990
* ------------------------------------------

use patentsfam3, clear

*drop p
*ren granted p

keep if y<=1990 
global cutoffPlacebo 1979
gen tmp = p>0 & y>$cutoffPlacebo  
egen survive = max(tmp), by(hrm)
keep if survive==1
drop tmp survive
tsset hrm y
tsfill, full  // Fill in the zeros
replace p=0 if p==.
*replace granted=0 if granted==.

* Cumulative sums
bys hrm: gen P = sum(p)
*bys hrm: gen Pgra = sum(granted)
label var P "Cumulative patenting since 1965"
gen tmp = P if y==1975
egen P75 = max(tmp), by(hrm)
drop tmp
*gen tmp = Pgra if y==1975
*egen Pgra75 = max(tmp), by(hrm)
*drop tmp
drop if y<1980

drop headq // Fill in headq
merge m:1 hrm using /tmp/headq, keep(match)
drop _merge
save DDplacebo, replace
*save DDplaceboGranted, replace

* ------------------------------------------
* Compile tariff data
* ------------------------------------------

global Granted "Granted"
*global Granted ""

* Rename variables
*use tauT5_pred, clear
use tauT5d_pred, clear
*keep hrm taut???? tautEx???? homew MissTar
keep hrm taut???? tautApp???? homew MissTar
ren taut???? tautPred????
*ren tautEx???? tautExPred????
ren tautApp???? tautAppPred????
ren (MissTar homew) (MissTarPred homewPred)
save $tmp/tauT5_pred, replace

*use tauT5b, clear
*use tauT5c, clear
use tauT5d, clear
*keep hrm headq nace Miss* ncty homecty homew taut1992-taut2004 tautEx1992-tautEx2004 tauthq1992-tauthq2004
keep hrm headq nace Miss* ncty homecty homew taut1992-taut2004 tautApp1992-tautApp2004 tauthq1992-tauthq2004
*merge 1:1 hrm using P85$Granted, keep(match) // Drop firms not in main sample
merge 1:1 hrm using P85, keep(match) 
drop _merge
*drop if MissTar>0 | MissTarhq==1 	// Drop firms that are patenting in countries with missing tariff data
merge 1:1 hrm_l2_id using tauM3, keep(match master) keepusing(taum1992-taum2004)
drop _merge
merge 1:1 hrm_l2_id using tauS4, keep(match master) keepusing(taus1992-taus2004 tausEx1992-tausEx2004 taushq1992-taushq2004)
drop _merge
merge 1:1 hrm_l2_id using lnPall, keep(match master) keepusing(lnPall1992-lnPall2004)
drop _merge
*merge 1:1 hrm_l2_id using $tmp/tauT5_pred, keep(match master) keepusing(tautPred1992-tautPred2004 tautExPred1992-tautExPred2004 homewPred Miss*)
merge 1:1 hrm_l2_id using $tmp/tauT5_pred, keep(match master) keepusing(tautPred1992-tautPred2004 tautAppPred1992-tautAppPred2004 homewPred Miss*)
drop _merge
merge 1:1 hrm_l2_id using tauT5d_Imp, keep(match master) nogen	  // Import tariffs (bilateral)
merge 1:1 hrm_l2_id using tauT5d_InpImp, keep(match master) nogen // Input tariffs (bilateral)

*reshape long taus tausEx taushq taum taut tautEx tauthq lnPall tautPred tautExPred, i(hrm headq) j(y)
*reshape long taus tausEx taushq taum taut tautApp tauthq lnPall tautPred tautExPred, i(hrm headq) j(y)
*reshape long taus tausEx taushq taum taut tautApp tauthq lnPall tautPred tautAppPred, i(hrm headq) j(y)
reshape long taus tausEx taushq taum taut tautApp tauthq lnPall tautPred ///
tautAppPred tautImp tautAppImp tautInpImp tautInpAppImp, i(hrm headq) j(y)
save "$tmp/tmpYY", replace

* ------------------------------------------
* Compile final dataset
* ------------------------------------------
*global cutoff 2003 // 1991/2003/2008
global cutoff 1991
global Granted "Granted"
*global Granted ""

use "$tmp/tmpYY", clear
keep if y==1992 | y==2000 | y==2004 
*foreach vv of varlist taus taut tautEx tauthq tautPred tautExPred {
*foreach vv of varlist taus taut tautApp tauthq tautPred tautExPred {
*foreach vv of varlist taus taut tautApp tauthq tautPred tautAppPred {
foreach vv of varlist taus taut tautApp tauthq tautPred tautAppPred tautImp tautAppImp {
  replace `vv' = `vv'/100
}

merge 1:1 hrm y using DD${Granted}${cutoff}
keep if _merge==3
drop _merge

gen C2 = C+1
*foreach vv of varlist P Ptri Pgra Pgra85 P85 C* Mbar Ibar IPCbar {
foreach vv of varlist P Ptri P85 C* Mbar Ibar IPCbar  {
  gen ln`vv' = log(`vv')
}

tsset hrm y

capture drop L8*
foreach vv of varlist p P Cbar Mbar Ibar IPCbar ln* tau*  cit*  {
  gen L8`vv' = `vv' - l8.`vv'
}

capture drop L4*
foreach vv of varlist p P Cbar Mbar Ibar IPCbar ln* tau* cit*  {
  gen L4`vv' = `vv' - l4.`vv'
}

capture drop LL*
*foreach vv of varlist lnP lnPgra taut tauthq tautPred tautEx lnPall lnCbar lnMbar lnIbar lnIPCbar {
*foreach vv of varlist lnP taut tauthq tautPred tautEx lnPall lnCbar lnMbar lnIbar lnIPCbar {
*foreach vv of varlist lnP taut tauthq tautPred tautApp lnPall lnCbar lnMbar lnIbar lnIPCbar {
foreach vv of varlist lnP taut tauthq tautPred tautApp tautImp tautAppImp lnPall lnCbar lnMbar lnIbar lnIPCbar {
  gen LL`vv' = L4`vv' - l4.L8`vv'
}

* Binary outcome
gen tmp = p>0 & y>1992 & y<=2000
egen Kplus9200 = max(tmp), by(hrm)
drop tmp
gen tmp = p>0 & y>2000 & y<=2004
egen Kplus0004 = max(tmp), by(hrm)
drop tmp
gen KplusD = Kplus0004-Kplus9200

*replace nace2_1 = 99 if nace2_1==.
egen id = group(nace2_1 headq)
encode headq, gen(hq)

gen exw = 1-homew

*save reg_variables, replace
*save reg_variables3${Granted}, replace
*save reg_variables4${Granted}, replace
*save reg_variables5${Granted}, replace
save reg_variables6c${Granted}, replace

/* * Identify the survivors
use reg_variables_survivors, clear
contract hrm
save $tmp/tmp_surv, replace */
 


* -----------------------------------------------------------------------------
* Compile WTO regression dataset
* -----------------------------------------------------------------------------
cd /Users/fcoell/Dropbox/PATSTAT/data
global tmp "/tmp"
global dta "/Users/fcoell/Dropbox/PATSTAT_spring2015"

global Granted "Granted"

capture set more off

* -----------------------------------------------------------------------------
* Get WTO membership
* -----------------------------------------------------------------------------
import delimited Ctry_heterog_data/wto_members.csv, varnames(1) encoding(utf8) clear
keep iso3 entry_year
rename iso3 iso_alpha3
save $tmp/wtomembers, replace 

use $dta/tls801/tls801, clear
replace iso_alpha3 = strtrim(iso_alpha3)
drop if missing(iso_alpha3)
drop if ctry_code=="DL"
/* DDR has 2 country codes in Patstat: DD, DL. Only DD is in our dataset. */
save $tmp/tmp801, replace

use reg_variables6${Granted}, clear
/* use reg_variables6b${Granted}, clear */
contract headq
drop _freq
rename headq ctry_code
merge 1:1 ctry_code using $tmp/tmp801, keep(match master)
// Replace Iso3 code of Taiwan and Romania to merge with WTO code
replace iso_alpha3 = "CHT" if iso_alpha3 == "TWN" & ctry_code == "TW"
replace iso_alpha3 = "ROM" if iso_alpha3 == "ROU" & ctry_code == "RO"
// Make fake iso code for headq without state indicator
replace iso_alpha3 = "X" + ctry_code if iso_alpha3 == "" & _merge == 1
replace state_indicator = "N" if _merge == 1
drop _merge continent eu_member epo_member oecd_member discontinued
rename ctry_code headq
merge 1:1 iso_alpha3 using $tmp/wtomembers, keep(match master)
gen wto = _merge == 3
gen wto1995 = _merge == 3 & entry_year == 1995
rename entry_year wto_entry_year
drop _merge
tab wto wto1995
save WTOmembers_sample${Granted}, replace

* -----------------------------------------------------------------------------
* Get applicant sector
/* Note: applicants can be assigned to multiple sectors.
-If an applicant is classified as firm at least once, then we consider him as
firm, even if he's also assigned to other sectors
- if an applicant is not a firm and is assigned to any other sector at least
once, then this applicant is classified as other sector, even if he's also an
individual */
* -----------------------------------------------------------------------------
use $dta/tls906/tls906_all, clear
replace sector = trim(sector)
contract hrm_l2_id sector
drop _freq
tab sector
mdesc sector
// sector missing in 60& of the raw data
gen firm = sector == "COMPANY"
gen individual = sector == "INDIVIDUAL"
gen unknown_sector = sector == "UNKNOWN" | missing(sector)
gen other_sector = firm == 0 & individual == 0 & unknown_sector == 0
collapse (max) firm individual other_sector (min) unknown_sector, by(hrm_l2_id)
// If we have any sector information, we assign it to individuals
replace individual = 0 if firm == 1 & individual == 1
replace other_sector = 0 if firm == 1 & other_sector == 1
replace individual = 0 if firm == 0 & individual == 1 & other_sector == 1
save hrm_sector, replace

* -----------------------------------------------------------------------
* Compile data
* -----------------------------------------------------------------------
*use reg_variables6${Granted}, clear
use reg_variables6c${Granted}, clear
// Merge WTO membership
merge m:1 headq using WTOmembers_sample${Granted}, assert(match)
drop _merge
// Merge applicant sector
merge m:1 hrm_l2_id using hrm_sector, keep(match master)
drop _merge
save reg_variables6c${Granted}_WTO, replace
